knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE )
# remotes::install_github("fishvice/tidydatras", ref = "dev") library(tidydatras) # remotes::install_github("fishvice/tidydatras") library(tidyverse) library(DescTools) # e.g. winsorize function spatialdir <- "C:/DATA/RDATA" load(file.path(spatialdir, "rect.RData")) datadir <- "C:/DATA/DATRAS/raw" source("../../r/geo_inside.R") # suppressMessages(tidydatras:::dr_download_all(save_dir="C:/TEMP", verbose=FALSE))
Note that the dr_getdata can work with multiple surveys (a loop is inside the function)
sur <- "FR-CGFS" hh <- readr::read_rds(file = paste0(datadir, "/", tolower(sur), "_hh.rds")) %>% dr_tidy() %>% # Make tidy with column specifications dr_idunite(., remove = FALSE) %>% # Add identifier left_join(reco, by=c("ship"="code")) %>% # Add vesselname mutate(year = as.integer(year)) hl <- readr::read_rds(file = paste0(datadir, "/", tolower(sur), "_hl.rds")) %>% dr_tidy() %>% # Make tidy with column specifications dr_idunite(., remove = FALSE) %>% # Add identifier dr_calccpue(hh) %>% # Calculated CPUE per hour left_join(aphia_latin) %>% # Add scientific name left_join(asfis) %>% # Add FAO species names and codes left_join(reco, by=c("ship"="code")) %>% # Add vesselname mutate(year = as.integer(year))
# stations st <- hl |> dplyr::select(survey, id, year, lon=shootlong, lat=shootlat) |> distinct() |> group_by(survey, year) |> mutate(n.tows = n_distinct(id)) |> ungroup() # may need to generate this for each survey year.min <- min(st$year) year.max <- max(st$year) # Check if we get a unique id: if(!st |> nrow() == st |> distinct(id, .keep_all = TRUE) |> nrow()) { warning("This is unexpected, check the code") } le <- hl |> bind_rows() |> dplyr::select(vessel, survey, id, year, lon=shootlong, lat=shootlat, latin, length, n=cpue_number_per_hour) |> filter(length > 0) |> # CHECK upstream why one has NA in n mutate(n = replace_na(n, 0)) |> # I thought this was done upstream mutate(length = floor(length)) |> group_by(vessel, survey, id, year, lon, lat, latin, length) |> summarise(n = sum(n), .groups = "drop") |> # get rid of species where cpue is always zero group_by(survey, latin) |> mutate(n.sum = sum(n)) |> ungroup() |> filter(n.sum > 0) |> select(-n.sum) species_table <- le |> filter(length > 0, n > 0) |> # NOTE: Should one have different species list for different surveys group_by(survey, latin) |> summarise(n.year.pos = sum(n_distinct(year)), .groups = "drop") |> filter(n.year.pos >= 5) |> # only proper species, not genus filter(str_detect(latin, " ")) |> select(latin) |> distinct() |> left_join(tidydatras::asfis) |> mutate(english_name = case_when(species == "PLA" ~ "Long rough dab", species == "MON" ~ "Monkfish", species == "WHB" ~ "Blue whiting", species == "PIL" ~ "Sardine", species == "LUM" ~ "Lumpfish", species == "BIB" ~ "Pouting", species == "POK" ~ "Saithe", species == "USK" ~ "Tusk", species == "MUR" ~ "Red mullet", species == "MAC" ~ "Mackerel", species == "HOM" ~ "Horse mackerel", .default = english_name)) |> arrange(english_name) # testing: species_table |> count(english_name) |> filter(n > 1) LATIN <- species_table$latin names(LATIN) <- species_table$english_name rbyl <- le |> filter(latin %in% LATIN) |> group_by(vessel, survey, year, latin, length) |> # the new kid on the block, here summarise returns a warning reframe(N = sum(n), # total number by length caught in the year (per 60 minute haul) B = sum(n * 0.00001 * length^3)) # mass [kg] ## trim length of species, make as a "plus group" ------------------------------- length.trim <- rbyl |> group_by(latin, length) |> reframe(B = sum(B)) |> arrange(latin, length) |> group_by(latin) |> mutate(cB = cumsum(B), cB.trim = 0.999 * max(cB), length.trim = ifelse(length > 30 & cB > cB.trim, NA, length), length.trim = ifelse(!is.na(length.trim), length.trim, max(length.trim, na.rm = TRUE))) |> select(latin, length, length.trim) |> ungroup() rbyl <- rbyl |> left_join(length.trim) |> mutate(length = length.trim) |> # fill in full cm lengths from min to max witin each species # select(vessel, survey, year, latin, length) |> # step not really needed, just added for clarity group_by(vessel, survey, latin) |> expand(year = full_seq(c(min(year), max(year)), 1), length = full_seq(length, 1)) |> # join back to get the N and B left_join(rbyl) |> mutate(N = replace_na(N, 0), B = replace_na(B, 0)) rbyl <- rbyl |> left_join(st |> count(survey, year, name = "n.tows")) |> # divide total by the number of hauls (or should it by by station?) mutate(n = N / n.tows, b = B / n.tows) |> select(-n.tows) # average over the whole survey time period # results by length, used in the length frequency plot ------------------------- rbl <- rbyl |> group_by(vessel, survey, latin, length) |> reframe(N = mean(N), B = mean(B), n = mean(n), b = mean(b)) # Throw out variables that are not used downstream to speed up the shiny loading rbyl <- rbyl |> select(vessel, survey, latin, year, length, n, b) rbl <- rbl |> select(vessel, survey, latin, length, n, b) rbys <- le |> filter(latin %in% LATIN) |> group_by(vessel, survey, id, year, lon, lat, latin) |> reframe(N = sum(n), B = sum(n * 0.00001 * length^3)) # add zero station rbys <- rbys |> expand(nesting(vessel, survey, id, year, lon, lat), latin) |> # get back N and B left_join(rbys) |> mutate(N = replace_na(N, 0), B = replace_na(B, 0)) my_boot = function(x, times = 100) { # Get column name from input object var = deparse(substitute(x)) var = gsub("^\\.\\$","", var) # Bootstrap 95% CI cis = stats::quantile(replicate(times, mean(sample(x, replace=TRUE))), probs=c(0.025,0.975)) # Return data frame of results data.frame(var, n=length(x), mean=mean(x), lower.ci=cis[1], upper.ci=cis[2]) } print("Bootstrapping abundance:") boot.N <- rbys |> dplyr::group_by(vessel, survey, latin, year) %>% dplyr::do(my_boot(.$N)) %>% dplyr::mutate(variable = "N", var = as.character(variable)) print("Bootstrapping biomass:") boot.B <- rbys %>% dplyr::group_by(vessel, survey, latin, year) %>% dplyr::do(my_boot(.$B)) %>% dplyr::mutate(variable = "B", var = as.character(var)) boot <- bind_rows(boot.N, boot.B) # Throw out variables that are not used downstream to speed up the shiny loading boot <- boot |> select(-n) %>% left_join(species_table) # gplyph plot ------------------------------------------------------------------ # need to create a dummy year before and after for the glyph-plot glyph <- rbys |> mutate(sq = geo::d2ir(lat, lon)) |> group_by(vessel, survey, year, sq, latin) |> summarise(N = mean(N), B = mean(B), .groups = "drop")
select_species <- "HER" select_survey <- "NS-IBTS" select_quarter <- 1 winsorize <- c(0.05, 0.95) length_step <- 0.5
myspecies <- c("MUR", "CTC","MAC","WHG", "HOM", "GUR", "GUG", "GUU", "PLE", "WEG", "SQR", "ANE", "JOD", "POD", "SYC") myvar <- "B" boot %>% dplyr::filter(species %in% myspecies, var == myvar) %>% dplyr::filter(year != 2020) %>% # dplyr::filter(var == myvar) %>% ggplot2::ggplot(ggplot2::aes(year, mean)) + ggplot2::theme_bw(base_size = 16) + ggplot2::theme(legend.position="bottom") + ggplot2::geom_pointrange(ggplot2::aes(year, mean, ymin = lower.ci, ymax = upper.ci, colour=vessel)) + ggplot2::geom_smooth(ggplot2::aes(year, mean, group=vessel, colour=vessel), method="lm", se=FALSE) + ggplot2::scale_x_continuous(breaks = seq(1985, 2025, by = 5)) + ggplot2::expand_limits(y = 0) + ggplot2::labs(x = NULL, y=myvar) + # ggplot2::facet_wrap(~paste(species, english_name), scales="free_y") ggplot2::facet_wrap(~paste(latin, english_name, species), scales="free_y", ncol=5)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.